1971 i86 kernel should be more careful when casting pointers
[unleashed.git] / usr / src / uts / i86pc / os / mp_pc.c
blobe2dbce2091081c42cb10c8e5c3781fe681470cf9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
29 * Copyright 2011 Joyent, Inc. All rights reserved.
33 * Welcome to the world of the "real mode platter".
34 * See also startup.c, mpcore.s and apic.c for related routines.
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cpuvar.h>
40 #include <sys/cpu_module.h>
41 #include <sys/kmem.h>
42 #include <sys/archsystm.h>
43 #include <sys/machsystm.h>
44 #include <sys/controlregs.h>
45 #include <sys/x86_archext.h>
46 #include <sys/smp_impldefs.h>
47 #include <sys/sysmacros.h>
48 #include <sys/mach_mmu.h>
49 #include <sys/promif.h>
50 #include <sys/cpu.h>
51 #include <sys/cpu_event.h>
52 #include <sys/sunndi.h>
53 #include <sys/fs/dv_node.h>
54 #include <vm/hat_i86.h>
55 #include <vm/as.h>
57 extern cpuset_t cpu_ready_set;
59 extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
60 extern void real_mode_start_cpu(void);
61 extern void real_mode_start_cpu_end(void);
62 extern void real_mode_stop_cpu_stage1(void);
63 extern void real_mode_stop_cpu_stage1_end(void);
64 extern void real_mode_stop_cpu_stage2(void);
65 extern void real_mode_stop_cpu_stage2_end(void);
66 extern void *(*cpu_pause_func)(void *);
68 void rmp_gdt_init(rm_platter_t *);
71 * Fill up the real mode platter to make it easy for real mode code to
72 * kick it off. This area should really be one passed by boot to kernel
73 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
74 * have identical physical and virtual address in paged mode.
76 static ushort_t *warm_reset_vector = NULL;
78 int
79 mach_cpucontext_init(void)
81 ushort_t *vec;
82 ulong_t addr;
83 struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
85 if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
86 sizeof (vec), PROT_READ | PROT_WRITE)))
87 return (-1);
90 * setup secondary cpu bios boot up vector
91 * Write page offset to 0x467 and page frame number to 0x469.
93 addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
94 vec[0] = (ushort_t)(addr & PAGEOFFSET);
95 vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
96 warm_reset_vector = vec;
98 /* Map real mode platter into kas so kernel can access it. */
99 hat_devload(kas.a_hat,
100 (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
101 btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
102 HAT_LOAD_NOCONSIST);
104 /* Copy CPU startup code to rm_platter if it's still during boot. */
105 if (!plat_dr_enabled()) {
106 ASSERT((size_t)real_mode_start_cpu_end -
107 (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
108 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
109 (size_t)real_mode_start_cpu_end -
110 (size_t)real_mode_start_cpu);
113 return (0);
116 void
117 mach_cpucontext_fini(void)
119 if (warm_reset_vector)
120 psm_unmap_phys((caddr_t)warm_reset_vector,
121 sizeof (warm_reset_vector));
122 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
123 HAT_UNLOAD);
126 #if defined(__amd64)
127 extern void *long_mode_64(void);
128 #endif /* __amd64 */
130 /*ARGSUSED*/
131 void
132 rmp_gdt_init(rm_platter_t *rm)
135 #if defined(__amd64)
136 /* Use the kas address space for the CPU startup thread. */
137 if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
138 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
139 "located above 4G in physical memory (@ 0x%lx)",
140 MAKECR3(kas.a_hat->hat_htable->ht_pfn));
143 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
144 * by code in real_mode_start_cpu():
146 * GDT[0]: NULL selector
147 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
149 * Clear the IDT as interrupts will be off and a limit of 0 will cause
150 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
151 * a course of action as any other, though it may cause the entire
152 * platform to reset in some cases...
154 rm->rm_temp_gdt[0] = 0;
155 rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
157 rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
158 rm->rm_temp_gdt_base = rm_platter_pa +
159 (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
160 rm->rm_temp_idt_lim = 0;
161 rm->rm_temp_idt_base = 0;
164 * Since the CPU needs to jump to protected mode using an identity
165 * mapped address, we need to calculate it here.
167 rm->rm_longmode64_addr = rm_platter_pa +
168 (uint32_t)((uintptr_t)long_mode_64 -
169 (uintptr_t)real_mode_start_cpu);
170 #endif /* __amd64 */
173 static void *
174 mach_cpucontext_alloc_tables(struct cpu *cp)
176 tss_t *ntss;
177 struct cpu_tables *ct;
180 * Allocate space for stack, tss, gdt and idt. We round the size
181 * allotted for cpu_tables up, so that the TSS is on a unique page.
182 * This is more efficient when running in virtual machines.
184 ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
185 if ((uintptr_t)ct & PAGEOFFSET)
186 panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
187 cp->cpu_id);
189 ntss = cp->cpu_tss = &ct->ct_tss;
191 #if defined(__amd64)
194 * #DF (double fault).
196 ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
198 #elif defined(__i386)
200 ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
201 (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
203 ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
205 ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
207 ntss->tss_cs = KCS_SEL;
208 ntss->tss_ds = ntss->tss_es = KDS_SEL;
209 ntss->tss_fs = KFS_SEL;
210 ntss->tss_gs = KGS_SEL;
212 #endif /* __i386 */
215 * Set I/O bit map offset equal to size of TSS segment limit
216 * for no I/O permission map. This will cause all user I/O
217 * instructions to generate #gp fault.
219 ntss->tss_bitmapbase = sizeof (*ntss);
222 * Setup kernel tss.
224 set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
225 sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
227 return (ct);
230 void *
231 mach_cpucontext_xalloc(struct cpu *cp, int optype)
233 size_t len;
234 struct cpu_tables *ct;
235 rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
236 static int cpu_halt_code_ready;
238 if (optype == MACH_CPUCONTEXT_OP_STOP) {
239 ASSERT(plat_dr_enabled());
242 * The WARM_RESET_VECTOR has a limitation that the physical
243 * address written to it must be page-aligned. To work around
244 * this limitation, the CPU stop code has been splitted into
245 * two stages.
246 * The stage 2 code, which implements the real logic to halt
247 * CPUs, is copied to the rm_cpu_halt_code field in the real
248 * mode platter. The stage 1 code, which simply jumps to the
249 * stage 2 code in the rm_cpu_halt_code field, is copied to
250 * rm_code field in the real mode platter and it may be
251 * overwritten after the CPU has been stopped.
253 if (!cpu_halt_code_ready) {
255 * The rm_cpu_halt_code field in the real mode platter
256 * is used by the CPU stop code only. So only copy the
257 * CPU stop stage 2 code into the rm_cpu_halt_code
258 * field on the first call.
260 len = (size_t)real_mode_stop_cpu_stage2_end -
261 (size_t)real_mode_stop_cpu_stage2;
262 ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
263 bcopy((caddr_t)real_mode_stop_cpu_stage2,
264 (caddr_t)rm->rm_cpu_halt_code, len);
265 cpu_halt_code_ready = 1;
269 * The rm_code field in the real mode platter is shared by
270 * the CPU start, CPU stop, CPR and fast reboot code. So copy
271 * the CPU stop stage 1 code into the rm_code field every time.
273 len = (size_t)real_mode_stop_cpu_stage1_end -
274 (size_t)real_mode_stop_cpu_stage1;
275 ASSERT(len <= RM_PLATTER_CODE_SIZE);
276 bcopy((caddr_t)real_mode_stop_cpu_stage1,
277 (caddr_t)rm->rm_code, len);
278 rm->rm_cpu_halted = 0;
280 return (cp->cpu_m.mcpu_mach_ctx_ptr);
281 } else if (optype != MACH_CPUCONTEXT_OP_START) {
282 return (NULL);
286 * Only need to allocate tables when starting CPU.
287 * Tables allocated when starting CPU will be reused when stopping CPU.
289 ct = mach_cpucontext_alloc_tables(cp);
290 if (ct == NULL) {
291 return (NULL);
294 /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
295 if (plat_dr_enabled()) {
296 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
297 (size_t)real_mode_start_cpu_end -
298 (size_t)real_mode_start_cpu);
302 * Now copy all that we've set up onto the real mode platter
303 * for the real mode code to digest as part of starting the cpu.
305 rm->rm_idt_base = cp->cpu_idt;
306 rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
307 rm->rm_gdt_base = cp->cpu_gdt;
308 rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
311 * CPU needs to access kernel address space after powering on.
312 * When hot-adding CPU at runtime, directly use top level page table
313 * of kas other than the return value of getcr3(). getcr3() returns
314 * current process's top level page table, which may be different from
315 * the one of kas.
317 rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
318 rm->rm_cpu = cp->cpu_id;
321 * For hot-adding CPU at runtime, Machine Check and Performance Counter
322 * should be disabled. They will be enabled on demand after CPU powers
323 * on successfully
325 rm->rm_cr4 = getcr4();
326 rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
328 rmp_gdt_init(rm);
330 return (ct);
333 void
334 mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
336 struct cpu_tables *ct = arg;
338 ASSERT(&ct->ct_tss == cp->cpu_tss);
339 if (optype == MACH_CPUCONTEXT_OP_START) {
340 switch (err) {
341 case 0:
343 * Save pointer for reuse when stopping CPU.
345 cp->cpu_m.mcpu_mach_ctx_ptr = arg;
346 break;
347 case ETIMEDOUT:
349 * The processor was poked, but failed to start before
350 * we gave up waiting for it. In case it starts later,
351 * don't free anything.
353 cp->cpu_m.mcpu_mach_ctx_ptr = arg;
354 break;
355 default:
357 * Some other, passive, error occurred.
359 kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
360 cp->cpu_tss = NULL;
361 break;
363 } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
364 switch (err) {
365 case 0:
367 * Free resources allocated when starting CPU.
369 kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
370 cp->cpu_tss = NULL;
371 cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
372 break;
373 default:
375 * Don't touch table pointer in case of failure.
377 break;
379 } else {
380 ASSERT(0);
384 void *
385 mach_cpucontext_alloc(struct cpu *cp)
387 return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
390 void
391 mach_cpucontext_free(struct cpu *cp, void *arg, int err)
393 mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
397 * "Enter monitor." Called via cross-call from stop_other_cpus().
399 void
400 mach_cpu_halt(char *msg)
402 if (msg)
403 prom_printf("%s\n", msg);
405 /*CONSTANTCONDITION*/
406 while (1)
410 void
411 mach_cpu_idle(void)
413 i86_halt();
416 void
417 mach_cpu_pause(volatile char *safe)
420 * This cpu is now safe.
422 *safe = PAUSE_WAIT;
423 membar_enter(); /* make sure stores are flushed */
426 * Now we wait. When we are allowed to continue, safe
427 * will be set to PAUSE_IDLE.
429 while (*safe != PAUSE_IDLE)
430 SMT_PAUSE();
434 * Power on the target CPU.
437 mp_cpu_poweron(struct cpu *cp)
439 int error;
440 cpuset_t tempset;
441 processorid_t cpuid;
443 ASSERT(cp != NULL);
444 cpuid = cp->cpu_id;
445 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
446 return (ENOTSUP);
447 } else if (cpuid < 0 || cpuid >= max_ncpus) {
448 return (EINVAL);
452 * The currrent x86 implementaiton of mp_cpu_configure() and
453 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
454 * be called once after calling mp_cpu_configure() for a specific CPU.
455 * It's because mp_cpu_poweron() will destroy data structure created
456 * by mp_cpu_configure(). So reject the request if the CPU has already
457 * been powered on once after calling mp_cpu_configure().
458 * This limitaiton only affects the p_online syscall and the DR driver
459 * won't be affected because the DR driver always invoke public CPU
460 * management interfaces in the predefined order:
461 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
463 if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
464 return (ENOTSUP);
468 * Check if there's at least a Mbyte of kmem available
469 * before attempting to start the cpu.
471 if (kmem_avail() < 1024 * 1024) {
473 * Kick off a reap in case that helps us with
474 * later attempts ..
476 kmem_reap();
477 return (ENOMEM);
480 affinity_set(CPU->cpu_id);
483 * Start the target CPU. No need to call mach_cpucontext_fini()
484 * if mach_cpucontext_init() fails.
486 if ((error = mach_cpucontext_init()) == 0) {
487 error = mp_start_cpu_common(cp, B_FALSE);
488 mach_cpucontext_fini();
490 if (error != 0) {
491 affinity_clear();
492 return (error);
495 /* Wait for the target cpu to reach READY state. */
496 tempset = cpu_ready_set;
497 while (!CPU_IN_SET(tempset, cpuid)) {
498 delay(1);
499 tempset = *((volatile cpuset_t *)&cpu_ready_set);
502 /* Mark the target CPU as available for mp operation. */
503 CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
505 /* Free the space allocated to hold the microcode file */
506 ucode_cleanup();
508 affinity_clear();
510 return (0);
513 #define MP_CPU_DETACH_MAX_TRIES 5
514 #define MP_CPU_DETACH_DELAY 100
516 static int
517 mp_cpu_detach_driver(dev_info_t *dip)
519 int i;
520 int rv = EBUSY;
521 dev_info_t *pdip;
523 pdip = ddi_get_parent(dip);
524 ASSERT(pdip != NULL);
526 * Check if caller holds pdip busy - can cause deadlocks in
527 * e_ddi_branch_unconfigure(), which calls devfs_clean().
529 if (DEVI_BUSY_OWNED(pdip)) {
530 return (EDEADLOCK);
533 for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
534 if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
535 rv = 0;
536 break;
538 DELAY(MP_CPU_DETACH_DELAY);
541 return (rv);
545 * Power off the target CPU.
546 * Note: cpu_lock will be released and then reacquired.
549 mp_cpu_poweroff(struct cpu *cp)
551 int rv = 0;
552 void *ctx;
553 dev_info_t *dip = NULL;
554 rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
555 extern void cpupm_start(cpu_t *);
556 extern void cpupm_stop(cpu_t *);
558 ASSERT(cp != NULL);
559 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
560 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
562 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
563 return (ENOTSUP);
566 * There is no support for powering off cpu0 yet.
567 * There are many pieces of code which have a hard dependency on cpu0.
569 if (cp->cpu_id == 0) {
570 return (ENOTSUP);
573 if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
574 return (ENXIO);
576 ASSERT(dip != NULL);
577 if (mp_cpu_detach_driver(dip) != 0) {
578 rv = EBUSY;
579 goto out_online;
582 /* Allocate CPU context for stopping */
583 if (mach_cpucontext_init() != 0) {
584 rv = ENXIO;
585 goto out_online;
587 ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
588 if (ctx == NULL) {
589 rv = ENXIO;
590 goto out_context_fini;
593 cpupm_stop(cp);
594 cpu_event_fini_cpu(cp);
596 if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
597 cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
598 cp->cpu_m.mcpu_cmi_hdl = NULL;
601 rv = mach_cpu_stop(cp, ctx);
602 if (rv != 0) {
603 goto out_enable_cmi;
606 /* Wait until the target CPU has been halted. */
607 while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
608 delay(1);
610 rm->rm_cpu_halted = 0xffff;
612 /* CPU_READY has been cleared by mach_cpu_stop. */
613 ASSERT((cp->cpu_flags & CPU_READY) == 0);
614 ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
615 cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
616 CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
618 mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
619 mach_cpucontext_fini();
621 return (0);
623 out_enable_cmi:
625 cmi_hdl_t hdl;
627 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
628 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
629 if (is_x86_feature(x86_featureset, X86FSET_MCA))
630 cmi_mca_init(hdl);
631 cp->cpu_m.mcpu_cmi_hdl = hdl;
634 cpu_event_init_cpu(cp);
635 cpupm_start(cp);
636 mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
638 out_context_fini:
639 mach_cpucontext_fini();
641 out_online:
642 (void) e_ddi_branch_configure(dip, NULL, 0);
644 if (rv != EAGAIN && rv != ETIME) {
645 rv = ENXIO;
648 return (rv);
652 * Return vcpu state, since this could be a virtual environment that we
653 * are unaware of, return "unknown".
655 /* ARGSUSED */
657 vcpu_on_pcpu(processorid_t cpu)
659 return (VCPU_STATE_UNKNOWN);