- pre5:
[davej-history.git] / arch / s390 / kernel / smp.c
blob6cf2e6918e127dea85d3b572231cd655a6ffaf92
1 /*
2 * arch/s390/kernel/smp.c
4 * S390 version
5 * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
7 * Martin Schwidefsky (schwidefsky@de.ibm.com)
9 * based on other smp stuff by
10 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
11 * (c) 1998 Ingo Molnar
13 * We work with logical cpu numbering everywhere we can. The only
14 * functions using the real cpu address (got from STAP) are the sigp
15 * functions. For all other functions we use the identity mapping.
16 * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
17 * used e.g. to find the idle task belonging to a logical cpu. Every array
18 * in the kernel is sorted by the logical cpu number and not by the physical
19 * one which is causing all the confusion with __cpu_logical_map and
20 * cpu_number_map in other architectures.
23 #include <linux/init.h>
25 #include <linux/mm.h>
26 #include <linux/spinlock.h>
27 #include <linux/kernel_stat.h>
28 #include <linux/smp_lock.h>
30 #include <linux/delay.h>
32 #include <asm/sigp.h>
33 #include <asm/pgalloc.h>
34 #include <asm/irq.h>
36 #include "cpcmd.h"
38 /* prototypes */
39 extern void update_one_process( struct task_struct *p,
40 unsigned long ticks, unsigned long user,
41 unsigned long system, int cpu);
42 extern int cpu_idle(void * unused);
44 extern __u16 boot_cpu_addr;
47 * An array with a pointer the lowcore of every CPU.
49 static int max_cpus = NR_CPUS; /* Setup configured maximum number of CPUs to activate */
50 int smp_num_cpus;
51 struct _lowcore *lowcore_ptr[NR_CPUS];
52 unsigned int prof_multiplier[NR_CPUS];
53 unsigned int prof_old_multiplier[NR_CPUS];
54 unsigned int prof_counter[NR_CPUS];
55 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
56 cycles_t cacheflush_time=0;
57 int smp_threads_ready=0; /* Set when the idlers are all forked. */
58 unsigned long ipi_count=0; /* Number of IPIs delivered. */
59 static atomic_t smp_commenced = ATOMIC_INIT(0);
61 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
64 * Setup routine for controlling SMP activation
66 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
67 * activation entirely (the MPS table probe still happens, though).
69 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
70 * greater than 0, limits the maximum number of CPUs activated in
71 * SMP mode to <NUM>.
74 static int __init nosmp(char *str)
76 max_cpus = 0;
77 return 1;
80 __setup("nosmp", nosmp);
82 static int __init maxcpus(char *str)
84 get_option(&str, &max_cpus);
85 return 1;
88 __setup("maxcpus=", maxcpus);
91 * Reboot, halt and power_off routines for SMP.
93 extern char vmhalt_cmd[];
94 extern char vmpoff_cmd[];
96 extern void reipl(unsigned long devno);
98 void do_machine_restart(void)
100 smp_send_stop();
101 reipl(S390_lowcore.ipl_device);
104 void machine_restart(char * __unused)
106 if (smp_processor_id() != 0) {
107 smp_ext_call_async(0, ec_restart);
108 for (;;);
109 } else
110 do_machine_restart();
113 void do_machine_halt(void)
115 smp_send_stop();
116 if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0)
117 cpcmd(vmhalt_cmd, NULL, 0);
118 disabled_wait(0);
121 void machine_halt(void)
123 if (smp_processor_id() != 0) {
124 smp_ext_call_async(0, ec_halt);
125 for (;;);
126 } else
127 do_machine_halt();
130 void do_machine_power_off(void)
132 smp_send_stop();
133 if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0)
134 cpcmd(vmpoff_cmd, NULL, 0);
135 disabled_wait(0);
138 void machine_power_off(void)
140 if (smp_processor_id() != 0) {
141 smp_ext_call_async(0, ec_power_off);
142 for (;;);
143 } else
144 do_machine_power_off();
148 * This is the main routine where commands issued by other
149 * cpus are handled.
152 void do_ext_call_interrupt(__u16 source_cpu_addr)
154 ec_ext_call *ec, *next;
155 int bits;
158 * handle bit signal external calls
160 * For the ec_schedule signal we have to do nothing. All the work
161 * is done automatically when we return from the interrupt.
162 * For the ec_restart, ec_halt and ec_power_off we call the
163 * appropriate routine.
165 do {
166 bits = atomic_read(&S390_lowcore.ext_call_fast);
167 } while (atomic_compare_and_swap(bits,0,&S390_lowcore.ext_call_fast));
169 if (test_bit(ec_restart, &bits))
170 do_machine_restart();
171 if (test_bit(ec_halt, &bits))
172 do_machine_halt();
173 if (test_bit(ec_power_off, &bits))
174 do_machine_power_off();
177 * Handle external call commands with a parameter area
179 do {
180 ec = (ec_ext_call *) atomic_read(&S390_lowcore.ext_call_queue);
181 } while (atomic_compare_and_swap((int) ec, 0,
182 &S390_lowcore.ext_call_queue));
183 if (ec == NULL)
184 return; /* no command signals */
186 /* Make a fifo out of the lifo */
187 next = ec;
188 ec->next = NULL;
189 while (next != NULL) {
190 ec_ext_call *tmp = next->next;
191 next->next = ec;
192 ec = next;
193 next = tmp;
196 /* Execute every sigp command on the queue */
197 while (ec != NULL) {
198 switch (ec->cmd) {
199 case ec_get_ctl: {
200 ec_creg_parms *pp;
201 pp = (ec_creg_parms *) ec->parms;
202 atomic_set(&ec->status,ec_executing);
203 asm volatile (
204 " bras 1,0f\n"
205 " stctl 0,0,0(%0)\n"
206 "0: ex %1,0(1)\n"
207 : : "a" (pp->cregs+pp->start_ctl),
208 "a" ((pp->start_ctl<<4) + pp->end_ctl)
209 : "memory", "1" );
210 atomic_set(&ec->status,ec_done);
211 return;
213 case ec_set_ctl: {
214 ec_creg_parms *pp;
215 pp = (ec_creg_parms *) ec->parms;
216 atomic_set(&ec->status,ec_executing);
217 asm volatile (
218 " bras 1,0f\n"
219 " lctl 0,0,0(%0)\n"
220 "0: ex %1,0(1)\n"
221 : : "a" (pp->cregs+pp->start_ctl),
222 "a" ((pp->start_ctl<<4) + pp->end_ctl)
223 : "memory", "1" );
224 atomic_set(&ec->status,ec_done);
225 return;
227 case ec_set_ctl_masked: {
228 ec_creg_mask_parms *pp;
229 u32 cregs[16];
230 int i;
232 pp = (ec_creg_mask_parms *) ec->parms;
233 atomic_set(&ec->status,ec_executing);
234 asm volatile (
235 " bras 1,0f\n"
236 " stctl 0,0,0(%0)\n"
237 "0: ex %1,0(1)\n"
238 : : "a" (cregs+pp->start_ctl),
239 "a" ((pp->start_ctl<<4) + pp->end_ctl)
240 : "memory", "1" );
241 for (i = pp->start_ctl; i <= pp->end_ctl; i++)
242 cregs[i] = (cregs[i] & pp->andvals[i])
243 | pp->orvals[i];
244 asm volatile (
245 " bras 1,0f\n"
246 " lctl 0,0,0(%0)\n"
247 "0: ex %1,0(1)\n"
248 : : "a" (cregs+pp->start_ctl),
249 "a" ((pp->start_ctl<<4) + pp->end_ctl)
250 : "memory", "1" );
251 atomic_set(&ec->status,ec_done);
252 return;
254 default:
256 ec = ec->next;
261 * Send an external call sigp to another cpu and wait for its completion.
263 sigp_ccode smp_ext_call_sync(int cpu, ec_cmd_sig cmd, void *parms)
265 struct _lowcore *lowcore = &get_cpu_lowcore(cpu);
266 sigp_ccode ccode;
267 ec_ext_call ec;
269 ec.cmd = cmd;
270 atomic_set(&ec.status, ec_pending);
271 ec.parms = parms;
272 do {
273 ec.next = (ec_ext_call*) atomic_read(&lowcore->ext_call_queue);
274 } while (atomic_compare_and_swap((int) ec.next, (int)(&ec),
275 &lowcore->ext_call_queue));
277 * We try once to deliver the signal. There are four possible
278 * return codes:
279 * 0) Order code accepted - can't show up on an external call
280 * 1) Status stored - fine, wait for completion.
281 * 2) Busy - there is another signal pending. Thats fine too, because
282 * do_ext_call from the pending signal will execute all signals on
283 * the queue. We wait for completion.
284 * 3) Not operational - something very bad has happened to the cpu.
285 * do not wait for completion.
287 ccode = signal_processor(cpu, sigp_external_call);
289 if (ccode != sigp_not_operational)
290 /* wait for completion, FIXME: possible seed of a deadlock */
291 while (atomic_read(&ec.status) != ec_done);
293 return ccode;
297 * Send an external call sigp to another cpu and return without waiting
298 * for its completion. Currently we do not support parameters with
299 * asynchronous sigps.
301 sigp_ccode smp_ext_call_async(int cpu, ec_bit_sig sig)
303 struct _lowcore *lowcore = &get_cpu_lowcore(cpu);
304 sigp_ccode ccode;
307 * Set signaling bit in lowcore of target cpu and kick it
309 atomic_set_mask(1<<sig, &lowcore->ext_call_fast);
310 ccode = signal_processor(cpu, sigp_external_call);
311 return ccode;
315 * Send an external call sigp to every other cpu in the system and
316 * wait for the completion of the sigps.
318 void smp_ext_call_sync_others(ec_cmd_sig cmd, void *parms)
320 struct _lowcore *lowcore;
321 ec_ext_call ec[NR_CPUS];
322 sigp_ccode ccode;
323 int i;
325 for (i = 0; i < smp_num_cpus; i++) {
326 if (smp_processor_id() == i)
327 continue;
328 lowcore = &get_cpu_lowcore(i);
329 ec[i].cmd = cmd;
330 atomic_set(&ec[i].status, ec_pending);
331 ec[i].parms = parms;
332 do {
333 ec[i].next = (ec_ext_call *)
334 atomic_read(&lowcore->ext_call_queue);
335 } while (atomic_compare_and_swap((int) ec[i].next, (int)(ec+i),
336 &lowcore->ext_call_queue));
337 ccode = signal_processor(i, sigp_external_call);
340 /* wait for completion, FIXME: possible seed of a deadlock */
341 for (i = 0; i < smp_num_cpus; i++) {
342 if (smp_processor_id() == i)
343 continue;
344 while (atomic_read(&ec[i].status) != ec_done);
349 * Send an external call sigp to every other cpu in the system and
350 * return without waiting for the completion of the sigps. Currently
351 * we do not support parameters with asynchronous sigps.
353 void smp_ext_call_async_others(ec_bit_sig sig)
355 struct _lowcore *lowcore;
356 sigp_ccode ccode;
357 int i;
359 for (i = 0; i < smp_num_cpus; i++) {
360 if (smp_processor_id() == i)
361 continue;
362 lowcore = &get_cpu_lowcore(i);
364 * Set signaling bit in lowcore of target cpu and kick it
366 atomic_set_mask(1<<sig, &lowcore->ext_call_fast);
367 ccode = signal_processor(i, sigp_external_call);
372 * cycles through all the cpus,
373 * returns early if info is not NULL & the processor has something
374 * of intrest to report in the info structure.
375 * it returns the next cpu to check if it returns early.
376 * i.e. it should be used as follows if you wish to receive info.
377 * next_cpu=0;
378 * do
380 * info->cpu=next_cpu;
381 * next_cpu=smp_signal_others(order_code,parameter,1,info);
382 * ... check info here
383 * } while(next_cpu<=smp_num_cpus)
385 * if you are lazy just use it like
386 * smp_signal_others(order_code,parameter,0,1,NULL);
388 int smp_signal_others(sigp_order_code order_code, u32 parameter,
389 int spin, sigp_info *info)
391 sigp_ccode ccode;
392 u32 dummy;
393 u16 i;
395 if (info)
396 info->intresting = 0;
397 for (i = (info ? info->cpu : 0); i < smp_num_cpus; i++) {
398 if (smp_processor_id() != i) {
399 do {
400 ccode = signal_processor_ps(
401 (info ? &info->status : &dummy),
402 parameter, i, order_code);
403 } while(spin && ccode == sigp_busy);
404 if (info && ccode != sigp_order_code_accepted) {
405 info->intresting = 1;
406 info->cpu = i;
407 info->ccode = ccode;
408 i++;
409 break;
413 return i;
417 * this function sends a 'stop' sigp to all other CPUs in the system.
418 * it goes straight through.
421 void smp_send_stop(void)
423 smp_signal_others(sigp_stop, 0, 1, NULL);
427 * this function sends a 'reschedule' IPI to another CPU.
428 * it goes straight through and wastes no time serializing
429 * anything. Worst case is that we lose a reschedule ...
432 void smp_send_reschedule(int cpu)
434 smp_ext_call_async(cpu, ec_schedule);
438 * Set a bit in a control register of all cpus
440 void smp_ctl_set_bit(int cr, int bit) {
441 ec_creg_mask_parms parms;
443 if (atomic_read(&smp_commenced) != 0) {
444 parms.start_ctl = cr;
445 parms.end_ctl = cr;
446 parms.orvals[cr] = 1 << bit;
447 parms.andvals[cr] = 0xFFFFFFFF;
448 smp_ext_call_sync_others(ec_set_ctl_masked,&parms);
450 __ctl_set_bit(cr, bit);
454 * Clear a bit in a control register of all cpus
456 void smp_ctl_clear_bit(int cr, int bit) {
457 ec_creg_mask_parms parms;
459 if (atomic_read(&smp_commenced) != 0) {
460 parms.start_ctl = cr;
461 parms.end_ctl = cr;
462 parms.orvals[cr] = 0x00000000;
463 parms.andvals[cr] = ~(1 << bit);
464 smp_ext_call_sync_others(ec_set_ctl_masked,&parms);
466 __ctl_clear_bit(cr, bit);
471 * Lets check how many CPUs we have.
474 void smp_count_cpus(void)
476 int curr_cpu;
478 __cpu_logical_map[0] = boot_cpu_addr;
479 current->processor = 0;
480 smp_num_cpus = 1;
481 for (curr_cpu = 0;
482 curr_cpu <= 65535 && smp_num_cpus < max_cpus; curr_cpu++) {
483 if ((__u16) curr_cpu == boot_cpu_addr)
484 continue;
485 __cpu_logical_map[smp_num_cpus] = (__u16) curr_cpu;
486 if (signal_processor(smp_num_cpus, sigp_sense) ==
487 sigp_not_operational)
488 continue;
489 smp_num_cpus++;
491 printk("Detected %d CPU's\n",(int) smp_num_cpus);
492 printk("Boot cpu address %2X\n", boot_cpu_addr);
497 * Activate a secondary processor.
499 extern void init_100hz_timer(void);
501 int __init start_secondary(void *cpuvoid)
503 /* Setup the cpu */
504 cpu_init();
505 /* Print info about this processor */
506 print_cpu_info(&safe_get_cpu_lowcore(smp_processor_id()).cpu_data);
507 /* Wait for completion of smp startup */
508 while (!atomic_read(&smp_commenced))
509 /* nothing */ ;
510 /* init per CPU 100 hz timer */
511 init_100hz_timer();
512 /* cpu_idle will call schedule for us */
513 return cpu_idle(NULL);
517 * The restart interrupt handler jumps to start_secondary directly
518 * without the detour over initialize_secondary. We defined it here
519 * so that the linker doesn't complain.
521 void __init initialize_secondary(void)
525 static int __init fork_by_hand(void)
527 struct pt_regs regs;
528 /* don't care about the psw and regs settings since we'll never
529 reschedule the forked task. */
530 memset(&regs,sizeof(pt_regs),0);
531 return do_fork(CLONE_VM|CLONE_PID, 0, &regs, 0);
534 static void __init do_boot_cpu(int cpu)
536 struct task_struct *idle;
537 struct _lowcore *cpu_lowcore;
539 /* We can't use kernel_thread since we must _avoid_ to reschedule
540 the child. */
541 if (fork_by_hand() < 0)
542 panic("failed fork for CPU %d", cpu);
545 * We remove it from the pidhash and the runqueue
546 * once we got the process:
548 idle = init_task.prev_task;
549 if (!idle)
550 panic("No idle process for CPU %d",cpu);
551 idle->processor = cpu;
552 idle->has_cpu = 1; /* we schedule the first task manually */
554 del_from_runqueue(idle);
555 unhash_process(idle);
556 init_tasks[cpu] = idle;
558 cpu_lowcore=&get_cpu_lowcore(cpu);
559 cpu_lowcore->kernel_stack=idle->thread.ksp;
560 __asm__ __volatile__("stctl 0,15,%0\n\t"
561 "stam 0,15,%1"
562 : "=m" (cpu_lowcore->cregs_save_area[0]),
563 "=m" (cpu_lowcore->access_regs_save_area[0])
564 : : "memory");
566 eieio();
567 signal_processor(cpu,sigp_restart);
571 * Architecture specific routine called by the kernel just before init is
572 * fired off. This allows the BP to have everything in order [we hope].
573 * At the end of this all the APs will hit the system scheduling and off
574 * we go. Each AP will load the system gdt's and jump through the kernel
575 * init into idle(). At this point the scheduler will one day take over
576 * and give them jobs to do. smp_callin is a standard routine
577 * we use to track CPUs as they power up.
580 void __init smp_commence(void)
583 * Lets the callins below out of their loop.
585 atomic_set(&smp_commenced,1);
589 * Cycle through the processors sending APIC IPIs to boot each.
592 void __init smp_boot_cpus(void)
594 struct _lowcore *curr_lowcore;
595 sigp_ccode ccode;
596 int i;
598 smp_count_cpus();
599 memset(lowcore_ptr,0,sizeof(lowcore_ptr));
602 * Initialize the logical to physical CPU number mapping
603 * and the per-CPU profiling counter/multiplier
606 for (i = 0; i < NR_CPUS; i++) {
607 prof_counter[i] = 1;
608 prof_old_multiplier[i] = 1;
609 prof_multiplier[i] = 1;
612 print_cpu_info(&safe_get_cpu_lowcore(0).cpu_data);
614 for(i = 0; i < smp_num_cpus; i++)
616 curr_lowcore = (struct _lowcore *)
617 __get_free_page(GFP_KERNEL|GFP_DMA);
618 if (curr_lowcore == NULL) {
619 printk("smp_boot_cpus failed to allocate prefix memory\n");
620 break;
622 lowcore_ptr[i] = curr_lowcore;
623 memcpy(curr_lowcore, &S390_lowcore, sizeof(struct _lowcore));
625 * Most of the parameters are set up when the cpu is
626 * started up.
628 if (smp_processor_id() == i)
629 set_prefix((u32) curr_lowcore);
630 else {
631 ccode = signal_processor_p((u32)(curr_lowcore),
632 i, sigp_set_prefix);
633 if(ccode) {
634 /* if this gets troublesome I'll have to do
635 * something about it. */
636 printk("ccode %d for cpu %d returned when "
637 "setting prefix in smp_boot_cpus not good.\n",
638 (int) ccode, (int) i);
640 else
641 do_boot_cpu(i);
647 * the frequency of the profiling timer can be changed
648 * by writing a multiplier value into /proc/profile.
650 * usually you want to run this on all CPUs ;)
652 int setup_profiling_timer(unsigned int multiplier)
654 return 0;
658 * Local timer interrupt handler. It does both profiling and
659 * process statistics/rescheduling.
661 * We do profiling in every local tick, statistics/rescheduling
662 * happen only every 'profiling multiplier' ticks. The default
663 * multiplier is 1 and it can be changed by writing the new multiplier
664 * value into /proc/profile.
667 void smp_local_timer_interrupt(struct pt_regs * regs)
669 int user = (user_mode(regs) != 0);
670 int cpu = smp_processor_id();
673 * The profiling function is SMP safe. (nothing can mess
674 * around with "current", and the profiling counters are
675 * updated with atomic operations). This is especially
676 * useful with a profiling multiplier != 1
678 if (!user_mode(regs))
679 s390_do_profile(regs->psw.addr);
681 if (!--prof_counter[cpu]) {
682 int system = 1-user;
683 struct task_struct * p = current;
686 * The multiplier may have changed since the last time we got
687 * to this point as a result of the user writing to
688 * /proc/profile. In this case we need to adjust the APIC
689 * timer accordingly.
691 * Interrupts are already masked off at this point.
693 prof_counter[cpu] = prof_multiplier[cpu];
694 if (prof_counter[cpu] != prof_old_multiplier[cpu]) {
695 /* FIXME setup_APIC_timer(calibration_result/prof_counter[cpu]
696 ); */
697 prof_old_multiplier[cpu] = prof_counter[cpu];
701 * After doing the above, we need to make like
702 * a normal interrupt - otherwise timer interrupts
703 * ignore the global interrupt lock, which is the
704 * WrongThing (tm) to do.
707 irq_enter(cpu, 0);
708 update_one_process(p, 1, user, system, cpu);
709 if (p->pid) {
710 p->counter -= 1;
711 if (p->counter <= 0) {
712 p->counter = 0;
713 p->need_resched = 1;
715 if (p->nice > 0) {
716 kstat.cpu_nice += user;
717 kstat.per_cpu_nice[cpu] += user;
718 } else {
719 kstat.cpu_user += user;
720 kstat.per_cpu_user[cpu] += user;
722 kstat.cpu_system += system;
723 kstat.per_cpu_system[cpu] += system;
726 irq_exit(cpu, 0);