2 * arch/s390/kernel/smp.c
5 * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
7 * Martin Schwidefsky (schwidefsky@de.ibm.com)
9 * based on other smp stuff by
10 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
11 * (c) 1998 Ingo Molnar
13 * We work with logical cpu numbering everywhere we can. The only
14 * functions using the real cpu address (got from STAP) are the sigp
15 * functions. For all other functions we use the identity mapping.
16 * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
17 * used e.g. to find the idle task belonging to a logical cpu. Every array
18 * in the kernel is sorted by the logical cpu number and not by the physical
19 * one which is causing all the confusion with __cpu_logical_map and
20 * cpu_number_map in other architectures.
23 #include <linux/init.h>
26 #include <linux/spinlock.h>
27 #include <linux/kernel_stat.h>
28 #include <linux/smp_lock.h>
30 #include <linux/delay.h>
33 #include <asm/pgalloc.h>
39 extern void update_one_process( struct task_struct
*p
,
40 unsigned long ticks
, unsigned long user
,
41 unsigned long system
, int cpu
);
42 extern int cpu_idle(void * unused
);
44 extern __u16 boot_cpu_addr
;
47 * An array with a pointer the lowcore of every CPU.
49 static int max_cpus
= NR_CPUS
; /* Setup configured maximum number of CPUs to activate */
51 struct _lowcore
*lowcore_ptr
[NR_CPUS
];
52 unsigned int prof_multiplier
[NR_CPUS
];
53 unsigned int prof_old_multiplier
[NR_CPUS
];
54 unsigned int prof_counter
[NR_CPUS
];
55 volatile int __cpu_logical_map
[NR_CPUS
]; /* logical cpu to cpu address */
56 cycles_t cacheflush_time
=0;
57 int smp_threads_ready
=0; /* Set when the idlers are all forked. */
58 unsigned long ipi_count
=0; /* Number of IPIs delivered. */
59 static atomic_t smp_commenced
= ATOMIC_INIT(0);
61 spinlock_t kernel_flag
= SPIN_LOCK_UNLOCKED
;
64 * Setup routine for controlling SMP activation
66 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
67 * activation entirely (the MPS table probe still happens, though).
69 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
70 * greater than 0, limits the maximum number of CPUs activated in
74 static int __init
nosmp(char *str
)
80 __setup("nosmp", nosmp
);
82 static int __init
maxcpus(char *str
)
84 get_option(&str
, &max_cpus
);
88 __setup("maxcpus=", maxcpus
);
91 * Reboot, halt and power_off routines for SMP.
93 extern char vmhalt_cmd
[];
94 extern char vmpoff_cmd
[];
96 extern void reipl(unsigned long devno
);
98 void do_machine_restart(void)
101 reipl(S390_lowcore
.ipl_device
);
104 void machine_restart(char * __unused
)
106 if (smp_processor_id() != 0) {
107 smp_ext_call_async(0, ec_restart
);
110 do_machine_restart();
113 void do_machine_halt(void)
116 if (MACHINE_IS_VM
&& strlen(vmhalt_cmd
) > 0)
117 cpcmd(vmhalt_cmd
, NULL
, 0);
121 void machine_halt(void)
123 if (smp_processor_id() != 0) {
124 smp_ext_call_async(0, ec_halt
);
130 void do_machine_power_off(void)
133 if (MACHINE_IS_VM
&& strlen(vmpoff_cmd
) > 0)
134 cpcmd(vmpoff_cmd
, NULL
, 0);
138 void machine_power_off(void)
140 if (smp_processor_id() != 0) {
141 smp_ext_call_async(0, ec_power_off
);
144 do_machine_power_off();
148 * This is the main routine where commands issued by other
152 void do_ext_call_interrupt(__u16 source_cpu_addr
)
154 ec_ext_call
*ec
, *next
;
158 * handle bit signal external calls
160 * For the ec_schedule signal we have to do nothing. All the work
161 * is done automatically when we return from the interrupt.
162 * For the ec_restart, ec_halt and ec_power_off we call the
163 * appropriate routine.
166 bits
= atomic_read(&S390_lowcore
.ext_call_fast
);
167 } while (atomic_compare_and_swap(bits
,0,&S390_lowcore
.ext_call_fast
));
169 if (test_bit(ec_restart
, &bits
))
170 do_machine_restart();
171 if (test_bit(ec_halt
, &bits
))
173 if (test_bit(ec_power_off
, &bits
))
174 do_machine_power_off();
177 * Handle external call commands with a parameter area
180 ec
= (ec_ext_call
*) atomic_read(&S390_lowcore
.ext_call_queue
);
181 } while (atomic_compare_and_swap((int) ec
, 0,
182 &S390_lowcore
.ext_call_queue
));
184 return; /* no command signals */
186 /* Make a fifo out of the lifo */
189 while (next
!= NULL
) {
190 ec_ext_call
*tmp
= next
->next
;
196 /* Execute every sigp command on the queue */
201 pp
= (ec_creg_parms
*) ec
->parms
;
202 atomic_set(&ec
->status
,ec_executing
);
207 : : "a" (pp
->cregs
+pp
->start_ctl
),
208 "a" ((pp
->start_ctl
<<4) + pp
->end_ctl
)
210 atomic_set(&ec
->status
,ec_done
);
215 pp
= (ec_creg_parms
*) ec
->parms
;
216 atomic_set(&ec
->status
,ec_executing
);
221 : : "a" (pp
->cregs
+pp
->start_ctl
),
222 "a" ((pp
->start_ctl
<<4) + pp
->end_ctl
)
224 atomic_set(&ec
->status
,ec_done
);
227 case ec_set_ctl_masked
: {
228 ec_creg_mask_parms
*pp
;
232 pp
= (ec_creg_mask_parms
*) ec
->parms
;
233 atomic_set(&ec
->status
,ec_executing
);
238 : : "a" (cregs
+pp
->start_ctl
),
239 "a" ((pp
->start_ctl
<<4) + pp
->end_ctl
)
241 for (i
= pp
->start_ctl
; i
<= pp
->end_ctl
; i
++)
242 cregs
[i
] = (cregs
[i
] & pp
->andvals
[i
])
248 : : "a" (cregs
+pp
->start_ctl
),
249 "a" ((pp
->start_ctl
<<4) + pp
->end_ctl
)
251 atomic_set(&ec
->status
,ec_done
);
261 * Send an external call sigp to another cpu and wait for its completion.
263 sigp_ccode
smp_ext_call_sync(int cpu
, ec_cmd_sig cmd
, void *parms
)
265 struct _lowcore
*lowcore
= &get_cpu_lowcore(cpu
);
270 atomic_set(&ec
.status
, ec_pending
);
273 ec
.next
= (ec_ext_call
*) atomic_read(&lowcore
->ext_call_queue
);
274 } while (atomic_compare_and_swap((int) ec
.next
, (int)(&ec
),
275 &lowcore
->ext_call_queue
));
277 * We try once to deliver the signal. There are four possible
279 * 0) Order code accepted - can't show up on an external call
280 * 1) Status stored - fine, wait for completion.
281 * 2) Busy - there is another signal pending. Thats fine too, because
282 * do_ext_call from the pending signal will execute all signals on
283 * the queue. We wait for completion.
284 * 3) Not operational - something very bad has happened to the cpu.
285 * do not wait for completion.
287 ccode
= signal_processor(cpu
, sigp_external_call
);
289 if (ccode
!= sigp_not_operational
)
290 /* wait for completion, FIXME: possible seed of a deadlock */
291 while (atomic_read(&ec
.status
) != ec_done
);
297 * Send an external call sigp to another cpu and return without waiting
298 * for its completion. Currently we do not support parameters with
299 * asynchronous sigps.
301 sigp_ccode
smp_ext_call_async(int cpu
, ec_bit_sig sig
)
303 struct _lowcore
*lowcore
= &get_cpu_lowcore(cpu
);
307 * Set signaling bit in lowcore of target cpu and kick it
309 atomic_set_mask(1<<sig
, &lowcore
->ext_call_fast
);
310 ccode
= signal_processor(cpu
, sigp_external_call
);
315 * Send an external call sigp to every other cpu in the system and
316 * wait for the completion of the sigps.
318 void smp_ext_call_sync_others(ec_cmd_sig cmd
, void *parms
)
320 struct _lowcore
*lowcore
;
321 ec_ext_call ec
[NR_CPUS
];
325 for (i
= 0; i
< smp_num_cpus
; i
++) {
326 if (smp_processor_id() == i
)
328 lowcore
= &get_cpu_lowcore(i
);
330 atomic_set(&ec
[i
].status
, ec_pending
);
333 ec
[i
].next
= (ec_ext_call
*)
334 atomic_read(&lowcore
->ext_call_queue
);
335 } while (atomic_compare_and_swap((int) ec
[i
].next
, (int)(ec
+i
),
336 &lowcore
->ext_call_queue
));
337 ccode
= signal_processor(i
, sigp_external_call
);
340 /* wait for completion, FIXME: possible seed of a deadlock */
341 for (i
= 0; i
< smp_num_cpus
; i
++) {
342 if (smp_processor_id() == i
)
344 while (atomic_read(&ec
[i
].status
) != ec_done
);
349 * Send an external call sigp to every other cpu in the system and
350 * return without waiting for the completion of the sigps. Currently
351 * we do not support parameters with asynchronous sigps.
353 void smp_ext_call_async_others(ec_bit_sig sig
)
355 struct _lowcore
*lowcore
;
359 for (i
= 0; i
< smp_num_cpus
; i
++) {
360 if (smp_processor_id() == i
)
362 lowcore
= &get_cpu_lowcore(i
);
364 * Set signaling bit in lowcore of target cpu and kick it
366 atomic_set_mask(1<<sig
, &lowcore
->ext_call_fast
);
367 ccode
= signal_processor(i
, sigp_external_call
);
372 * cycles through all the cpus,
373 * returns early if info is not NULL & the processor has something
374 * of intrest to report in the info structure.
375 * it returns the next cpu to check if it returns early.
376 * i.e. it should be used as follows if you wish to receive info.
380 * info->cpu=next_cpu;
381 * next_cpu=smp_signal_others(order_code,parameter,1,info);
382 * ... check info here
383 * } while(next_cpu<=smp_num_cpus)
385 * if you are lazy just use it like
386 * smp_signal_others(order_code,parameter,0,1,NULL);
388 int smp_signal_others(sigp_order_code order_code
, u32 parameter
,
389 int spin
, sigp_info
*info
)
396 info
->intresting
= 0;
397 for (i
= (info
? info
->cpu
: 0); i
< smp_num_cpus
; i
++) {
398 if (smp_processor_id() != i
) {
400 ccode
= signal_processor_ps(
401 (info
? &info
->status
: &dummy
),
402 parameter
, i
, order_code
);
403 } while(spin
&& ccode
== sigp_busy
);
404 if (info
&& ccode
!= sigp_order_code_accepted
) {
405 info
->intresting
= 1;
417 * this function sends a 'stop' sigp to all other CPUs in the system.
418 * it goes straight through.
421 void smp_send_stop(void)
423 smp_signal_others(sigp_stop
, 0, 1, NULL
);
427 * this function sends a 'reschedule' IPI to another CPU.
428 * it goes straight through and wastes no time serializing
429 * anything. Worst case is that we lose a reschedule ...
432 void smp_send_reschedule(int cpu
)
434 smp_ext_call_async(cpu
, ec_schedule
);
438 * Set a bit in a control register of all cpus
440 void smp_ctl_set_bit(int cr
, int bit
) {
441 ec_creg_mask_parms parms
;
443 if (atomic_read(&smp_commenced
) != 0) {
444 parms
.start_ctl
= cr
;
446 parms
.orvals
[cr
] = 1 << bit
;
447 parms
.andvals
[cr
] = 0xFFFFFFFF;
448 smp_ext_call_sync_others(ec_set_ctl_masked
,&parms
);
450 __ctl_set_bit(cr
, bit
);
454 * Clear a bit in a control register of all cpus
456 void smp_ctl_clear_bit(int cr
, int bit
) {
457 ec_creg_mask_parms parms
;
459 if (atomic_read(&smp_commenced
) != 0) {
460 parms
.start_ctl
= cr
;
462 parms
.orvals
[cr
] = 0x00000000;
463 parms
.andvals
[cr
] = ~(1 << bit
);
464 smp_ext_call_sync_others(ec_set_ctl_masked
,&parms
);
466 __ctl_clear_bit(cr
, bit
);
471 * Lets check how many CPUs we have.
474 void smp_count_cpus(void)
478 __cpu_logical_map
[0] = boot_cpu_addr
;
479 current
->processor
= 0;
482 curr_cpu
<= 65535 && smp_num_cpus
< max_cpus
; curr_cpu
++) {
483 if ((__u16
) curr_cpu
== boot_cpu_addr
)
485 __cpu_logical_map
[smp_num_cpus
] = (__u16
) curr_cpu
;
486 if (signal_processor(smp_num_cpus
, sigp_sense
) ==
487 sigp_not_operational
)
491 printk("Detected %d CPU's\n",(int) smp_num_cpus
);
492 printk("Boot cpu address %2X\n", boot_cpu_addr
);
497 * Activate a secondary processor.
499 extern void init_100hz_timer(void);
501 int __init
start_secondary(void *cpuvoid
)
505 /* Print info about this processor */
506 print_cpu_info(&safe_get_cpu_lowcore(smp_processor_id()).cpu_data
);
507 /* Wait for completion of smp startup */
508 while (!atomic_read(&smp_commenced
))
510 /* init per CPU 100 hz timer */
512 /* cpu_idle will call schedule for us */
513 return cpu_idle(NULL
);
517 * The restart interrupt handler jumps to start_secondary directly
518 * without the detour over initialize_secondary. We defined it here
519 * so that the linker doesn't complain.
521 void __init
initialize_secondary(void)
525 static int __init
fork_by_hand(void)
528 /* don't care about the psw and regs settings since we'll never
529 reschedule the forked task. */
530 memset(®s
,sizeof(pt_regs
),0);
531 return do_fork(CLONE_VM
|CLONE_PID
, 0, ®s
, 0);
534 static void __init
do_boot_cpu(int cpu
)
536 struct task_struct
*idle
;
537 struct _lowcore
*cpu_lowcore
;
539 /* We can't use kernel_thread since we must _avoid_ to reschedule
541 if (fork_by_hand() < 0)
542 panic("failed fork for CPU %d", cpu
);
545 * We remove it from the pidhash and the runqueue
546 * once we got the process:
548 idle
= init_task
.prev_task
;
550 panic("No idle process for CPU %d",cpu
);
551 idle
->processor
= cpu
;
552 idle
->has_cpu
= 1; /* we schedule the first task manually */
554 del_from_runqueue(idle
);
555 unhash_process(idle
);
556 init_tasks
[cpu
] = idle
;
558 cpu_lowcore
=&get_cpu_lowcore(cpu
);
559 cpu_lowcore
->kernel_stack
=idle
->thread
.ksp
;
560 __asm__
__volatile__("stctl 0,15,%0\n\t"
562 : "=m" (cpu_lowcore
->cregs_save_area
[0]),
563 "=m" (cpu_lowcore
->access_regs_save_area
[0])
567 signal_processor(cpu
,sigp_restart
);
571 * Architecture specific routine called by the kernel just before init is
572 * fired off. This allows the BP to have everything in order [we hope].
573 * At the end of this all the APs will hit the system scheduling and off
574 * we go. Each AP will load the system gdt's and jump through the kernel
575 * init into idle(). At this point the scheduler will one day take over
576 * and give them jobs to do. smp_callin is a standard routine
577 * we use to track CPUs as they power up.
580 void __init
smp_commence(void)
583 * Lets the callins below out of their loop.
585 atomic_set(&smp_commenced
,1);
589 * Cycle through the processors sending APIC IPIs to boot each.
592 void __init
smp_boot_cpus(void)
594 struct _lowcore
*curr_lowcore
;
599 memset(lowcore_ptr
,0,sizeof(lowcore_ptr
));
602 * Initialize the logical to physical CPU number mapping
603 * and the per-CPU profiling counter/multiplier
606 for (i
= 0; i
< NR_CPUS
; i
++) {
608 prof_old_multiplier
[i
] = 1;
609 prof_multiplier
[i
] = 1;
612 print_cpu_info(&safe_get_cpu_lowcore(0).cpu_data
);
614 for(i
= 0; i
< smp_num_cpus
; i
++)
616 curr_lowcore
= (struct _lowcore
*)
617 __get_free_page(GFP_KERNEL
|GFP_DMA
);
618 if (curr_lowcore
== NULL
) {
619 printk("smp_boot_cpus failed to allocate prefix memory\n");
622 lowcore_ptr
[i
] = curr_lowcore
;
623 memcpy(curr_lowcore
, &S390_lowcore
, sizeof(struct _lowcore
));
625 * Most of the parameters are set up when the cpu is
628 if (smp_processor_id() == i
)
629 set_prefix((u32
) curr_lowcore
);
631 ccode
= signal_processor_p((u32
)(curr_lowcore
),
634 /* if this gets troublesome I'll have to do
635 * something about it. */
636 printk("ccode %d for cpu %d returned when "
637 "setting prefix in smp_boot_cpus not good.\n",
638 (int) ccode
, (int) i
);
647 * the frequency of the profiling timer can be changed
648 * by writing a multiplier value into /proc/profile.
650 * usually you want to run this on all CPUs ;)
652 int setup_profiling_timer(unsigned int multiplier
)
658 * Local timer interrupt handler. It does both profiling and
659 * process statistics/rescheduling.
661 * We do profiling in every local tick, statistics/rescheduling
662 * happen only every 'profiling multiplier' ticks. The default
663 * multiplier is 1 and it can be changed by writing the new multiplier
664 * value into /proc/profile.
667 void smp_local_timer_interrupt(struct pt_regs
* regs
)
669 int user
= (user_mode(regs
) != 0);
670 int cpu
= smp_processor_id();
673 * The profiling function is SMP safe. (nothing can mess
674 * around with "current", and the profiling counters are
675 * updated with atomic operations). This is especially
676 * useful with a profiling multiplier != 1
678 if (!user_mode(regs
))
679 s390_do_profile(regs
->psw
.addr
);
681 if (!--prof_counter
[cpu
]) {
683 struct task_struct
* p
= current
;
686 * The multiplier may have changed since the last time we got
687 * to this point as a result of the user writing to
688 * /proc/profile. In this case we need to adjust the APIC
691 * Interrupts are already masked off at this point.
693 prof_counter
[cpu
] = prof_multiplier
[cpu
];
694 if (prof_counter
[cpu
] != prof_old_multiplier
[cpu
]) {
695 /* FIXME setup_APIC_timer(calibration_result/prof_counter[cpu]
697 prof_old_multiplier
[cpu
] = prof_counter
[cpu
];
701 * After doing the above, we need to make like
702 * a normal interrupt - otherwise timer interrupts
703 * ignore the global interrupt lock, which is the
704 * WrongThing (tm) to do.
708 update_one_process(p
, 1, user
, system
, cpu
);
711 if (p
->counter
<= 0) {
716 kstat
.cpu_nice
+= user
;
717 kstat
.per_cpu_nice
[cpu
] += user
;
719 kstat
.cpu_user
+= user
;
720 kstat
.per_cpu_user
[cpu
] += user
;
722 kstat
.cpu_system
+= system
;
723 kstat
.per_cpu_system
[cpu
] += system
;