16076 pwdx should work on core files
[illumos-gate.git] / usr / src / uts / i86pc / os / cpr_impl.c
blob4a5c71b35da00856a9ac6d7dcfe241d41a336887
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Platform specific implementation code
27 * Currently only suspend to RAM is supported (ACPI S3)
30 #define SUNDDI_IMPL
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/cpuvar.h>
37 #include <sys/pte.h>
38 #include <vm/hat.h>
39 #include <vm/page.h>
40 #include <vm/as.h>
41 #include <sys/cpr.h>
42 #include <sys/kmem.h>
43 #include <sys/clock.h>
44 #include <sys/kmem.h>
45 #include <sys/panic.h>
46 #include <vm/seg_kmem.h>
47 #include <sys/cpu_module.h>
48 #include <sys/callb.h>
49 #include <sys/machsystm.h>
50 #include <sys/vmsystm.h>
51 #include <sys/systm.h>
52 #include <sys/archsystm.h>
53 #include <sys/stack.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/memlist.h>
56 #include <sys/bootconf.h>
57 #include <sys/thread.h>
58 #include <sys/x_call.h>
59 #include <sys/smp_impldefs.h>
60 #include <vm/vm_dep.h>
61 #include <sys/psm.h>
62 #include <sys/epm.h>
63 #include <sys/cpr_wakecode.h>
64 #include <sys/x86_archext.h>
65 #include <sys/reboot.h>
66 #include <sys/acpi/acpi.h>
67 #include <sys/acpica.h>
68 #include <sys/fp.h>
69 #include <sys/sysmacros.h>
71 #define AFMT "%lx"
73 extern int flushes_require_xcalls;
74 extern cpuset_t cpu_ready_set;
76 extern void *wc_long_mode_64(void);
77 extern int tsc_gethrtime_enable;
78 extern void i_cpr_start_cpu(void);
80 ushort_t cpr_mach_type = CPR_MACHTYPE_X86;
81 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
83 static wc_cpu_t *wc_other_cpus = NULL;
84 static cpuset_t procset;
86 static void
87 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
89 static int i_cpr_platform_alloc(psm_state_request_t *req);
90 static void i_cpr_platform_free(psm_state_request_t *req);
91 static int i_cpr_save_apic(psm_state_request_t *req);
92 static int i_cpr_restore_apic(psm_state_request_t *req);
93 static int wait_for_set(cpuset_t *set, int who);
95 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
96 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
98 #ifdef STACK_GROWTH_DOWN
99 #define CPR_GET_STACK_START(t) ((t)->t_stkbase)
100 #define CPR_GET_STACK_END(t) ((t)->t_stk)
101 #else
102 #define CPR_GET_STACK_START(t) ((t)->t_stk)
103 #define CPR_GET_STACK_END(t) ((t)->t_stkbase)
104 #endif /* STACK_GROWTH_DOWN */
107 * restart paused slave cpus
109 void
110 i_cpr_machdep_setup(void)
112 if (ncpus > 1) {
113 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
114 mutex_enter(&cpu_lock);
115 start_cpus();
116 mutex_exit(&cpu_lock);
122 * Stop all interrupt activities in the system
124 void
125 i_cpr_stop_intr(void)
127 (void) spl7();
131 * Set machine up to take interrupts
133 void
134 i_cpr_enable_intr(void)
136 (void) spl0();
140 * Save miscellaneous information which needs to be written to the
141 * state file. This information is required to re-initialize
142 * kernel/prom handshaking.
144 void
145 i_cpr_save_machdep_info(void)
147 int notcalled = 0;
148 ASSERT(notcalled);
152 void
153 i_cpr_set_tbr(void)
158 processorid_t
159 i_cpr_bootcpuid(void)
161 return (0);
165 * cpu0 should contain bootcpu info
167 cpu_t *
168 i_cpr_bootcpu(void)
170 ASSERT(MUTEX_HELD(&cpu_lock));
172 return (cpu_get(i_cpr_bootcpuid()));
176 * Save context for the specified CPU
178 void *
179 i_cpr_save_context(void *arg)
181 long index = (long)arg;
182 psm_state_request_t *papic_state;
183 int resuming;
184 int ret;
185 wc_cpu_t *wc_cpu = wc_other_cpus + index;
187 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
189 ASSERT(index < NCPU);
191 papic_state = &(wc_cpu)->wc_apic_state;
193 ret = i_cpr_platform_alloc(papic_state);
194 ASSERT(ret == 0);
196 ret = i_cpr_save_apic(papic_state);
197 ASSERT(ret == 0);
199 i_cpr_save_stack(curthread, wc_cpu);
202 * wc_save_context returns twice, once when susending and
203 * once when resuming, wc_save_context() returns 0 when
204 * suspending and non-zero upon resume
206 resuming = (wc_save_context(wc_cpu) == 0);
209 * do NOT call any functions after this point, because doing so
210 * will modify the stack that we are running on
213 if (resuming) {
215 ret = i_cpr_restore_apic(papic_state);
216 ASSERT(ret == 0);
218 i_cpr_platform_free(papic_state);
221 * Enable interrupts on this cpu.
222 * Do not bind interrupts to this CPU's local APIC until
223 * the CPU is ready to receive interrupts.
225 ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
226 mutex_enter(&cpu_lock);
227 cpu_enable_intr(CPU);
228 mutex_exit(&cpu_lock);
231 * Setting the bit in cpu_ready_set must be the last operation
232 * in processor initialization; the boot CPU will continue to
233 * boot once it sees this bit set for all active CPUs.
235 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
237 PMD(PMD_SX,
238 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
239 CPU->cpu_id))
240 } else {
242 * Disable interrupts on this CPU so that PSM knows not to bind
243 * interrupts here on resume until the CPU has executed
244 * cpu_enable_intr() (above) in the resume path.
245 * We explicitly do not grab cpu_lock here because at this point
246 * in the suspend process, the boot cpu owns cpu_lock and all
247 * other cpus are also executing in the pause thread (only
248 * modifying their respective CPU structure).
250 (void) cpu_disable_intr(CPU);
253 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
254 resuming))
256 return (NULL);
259 static ushort_t *warm_reset_vector = NULL;
261 static ushort_t *
262 map_warm_reset_vector()
264 /*LINTED*/
265 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
266 sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
267 return (NULL);
270 * setup secondary cpu bios boot up vector
272 *warm_reset_vector = (ushort_t)((caddr_t)
273 /*LINTED*/
274 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
275 + ((ulong_t)rm_platter_va & 0xf));
276 warm_reset_vector++;
277 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
279 --warm_reset_vector;
280 return (warm_reset_vector);
283 void
284 i_cpr_pre_resume_cpus()
287 * this is a cut down version of start_other_cpus()
288 * just do the initialization to wake the other cpus
290 unsigned who;
291 int boot_cpuid = i_cpr_bootcpuid();
292 uint32_t code_length = 0;
293 caddr_t wakevirt = rm_platter_va;
294 /*LINTED*/
295 wakecode_t *wp = (wakecode_t *)wakevirt;
296 char *str = "i_cpr_pre_resume_cpus";
297 extern int get_tsc_ready();
298 int err;
300 /*LINTED*/
301 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
304 * If startup wasn't able to find a page under 1M, we cannot
305 * proceed.
307 if (rm_platter_va == 0) {
308 cmn_err(CE_WARN, "Cannot suspend the system because no "
309 "memory below 1M could be found for processor startup");
310 return;
314 * Copy the real mode code at "real_mode_start" to the
315 * page at rm_platter_va.
317 warm_reset_vector = map_warm_reset_vector();
318 if (warm_reset_vector == NULL) {
319 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
320 return;
323 flushes_require_xcalls = 1;
326 * We lock our affinity to the master CPU to ensure that all slave CPUs
327 * do their TSC syncs with the same CPU.
330 affinity_set(CPU_CURRENT);
333 * Mark the boot cpu as being ready and in the procset, since we are
334 * running on that cpu.
336 CPUSET_ONLY(cpu_ready_set, boot_cpuid);
337 CPUSET_ONLY(procset, boot_cpuid);
339 for (who = 0; who < max_ncpus; who++) {
341 wc_cpu_t *cpup = wc_other_cpus + who;
342 wc_desctbr_t gdt;
344 if (who == boot_cpuid)
345 continue;
347 if (!CPU_IN_SET(mp_cpus, who))
348 continue;
350 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
352 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
354 gdt.base = cpup->wc_gdt_base;
355 gdt.limit = cpup->wc_gdt_limit;
357 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
358 (uintptr_t)wc_rm_start);
360 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
362 mutex_enter(&cpu_lock);
363 err = mach_cpuid_start(who, rm_platter_va);
364 mutex_exit(&cpu_lock);
365 if (err != 0) {
366 cmn_err(CE_WARN, "cpu%d: failed to start during "
367 "suspend/resume error %d", who, err);
368 continue;
371 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
373 if (!wait_for_set(&procset, who))
374 continue;
376 PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
378 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
380 if (tsc_gethrtime_enable) {
381 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
382 tsc_sync_master(who);
385 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
386 who))
388 * Wait for cpu to declare that it is ready, we want the
389 * cpus to start serially instead of in parallel, so that
390 * they do not contend with each other in wc_rm_start()
392 if (!wait_for_set(&cpu_ready_set, who))
393 continue;
396 * do not need to re-initialize dtrace using dtrace_cpu_init
397 * function
399 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
402 affinity_clear();
404 PMD(PMD_SX, ("%s() all cpus now ready\n", str))
408 static void
409 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
411 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
415 * We need to setup a 1:1 (virtual to physical) mapping for the
416 * page containing the wakeup code.
418 static struct as *save_as; /* when switching to kas */
420 static void
421 unmap_wakeaddr_1to1(uint64_t wakephys)
423 uintptr_t wp = (uintptr_t)wakephys;
424 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */
425 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
428 void
429 i_cpr_post_resume_cpus()
431 uint64_t wakephys = rm_platter_pa;
433 if (warm_reset_vector != NULL)
434 unmap_warm_reset_vector(warm_reset_vector);
436 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
437 HAT_UNLOAD);
440 * cmi_post_mpstartup() is only required upon boot not upon
441 * resume from RAM
444 PT(PT_UNDO1to1);
445 /* Tear down 1:1 mapping for wakeup code */
446 unmap_wakeaddr_1to1(wakephys);
449 /* ARGSUSED */
450 void
451 i_cpr_handle_xc(int flag)
456 i_cpr_reusable_supported(void)
458 return (0);
460 static void
461 map_wakeaddr_1to1(uint64_t wakephys)
463 uintptr_t wp = (uintptr_t)wakephys;
464 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
465 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
466 HAT_LOAD);
467 save_as = curthread->t_procp->p_as;
468 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */
472 void
473 prt_other_cpus()
475 int who;
477 if (ncpus == 1) {
478 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
479 "uniprocessor machine\n"))
480 return;
483 for (who = 0; who < max_ncpus; who++) {
485 wc_cpu_t *cpup = wc_other_cpus + who;
487 if (!CPU_IN_SET(mp_cpus, who))
488 continue;
490 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
491 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
492 AFMT ", sp=%lx\n", who,
493 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
494 (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
495 (long)cpup->wc_ldt, (long)cpup->wc_tr,
496 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
501 * Power down the system.
504 i_cpr_power_down(int sleeptype)
506 caddr_t wakevirt = rm_platter_va;
507 uint64_t wakephys = rm_platter_pa;
508 ulong_t saved_intr;
509 uint32_t code_length = 0;
510 wc_desctbr_t gdt;
511 /*LINTED*/
512 wakecode_t *wp = (wakecode_t *)wakevirt;
513 /*LINTED*/
514 rm_platter_t *wcpp = (rm_platter_t *)wakevirt;
515 wc_cpu_t *cpup = &(wp->wc_cpu);
516 dev_info_t *ppm;
517 int ret = 0;
518 power_req_t power_req;
519 char *str = "i_cpr_power_down";
520 /*LINTED*/
521 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
522 extern int cpr_suspend_succeeded;
523 extern void kernel_wc_code();
525 ASSERT(sleeptype == CPR_TORAM);
526 ASSERT(CPU->cpu_id == 0);
528 if ((ppm = PPM(ddi_root_node())) == NULL) {
529 PMD(PMD_SX, ("%s: root node not claimed\n", str))
530 return (ENOTTY);
533 PMD(PMD_SX, ("Entering %s()\n", str))
535 PT(PT_IC);
536 saved_intr = intr_clear();
538 PT(PT_1to1);
539 /* Setup 1:1 mapping for wakeup code */
540 map_wakeaddr_1to1(wakephys);
542 PMD(PMD_SX, ("ncpus=%d\n", ncpus))
544 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
545 ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)),
546 WC_CODESIZE))
548 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
549 (void *)wakevirt, (uint_t)wakephys))
551 ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) <
552 WC_CODESIZE);
554 bzero(wakevirt, PAGESIZE);
556 /* Copy code to rm_platter */
557 bcopy((caddr_t)wc_rm_start, wakevirt,
558 (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start));
560 prt_other_cpus();
563 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
564 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
566 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
567 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
569 real_mode_platter->rm_cr4 = getcr4();
570 real_mode_platter->rm_pdbr = getcr3();
572 rmp_gdt_init(real_mode_platter);
575 * Since the CPU needs to jump to protected mode using an identity
576 * mapped address, we need to calculate it here.
578 real_mode_platter->rm_longmode64_addr = rm_platter_pa +
579 (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start);
581 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
582 (ulong_t)real_mode_platter->rm_cr4, getcr4()))
583 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
584 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
586 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
587 (ulong_t)real_mode_platter->rm_longmode64_addr))
590 PT(PT_SC);
591 if (wc_save_context(cpup)) {
593 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
594 if (ret != 0)
595 return (ret);
597 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
598 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
599 if (ret != 0)
600 return (ret);
602 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
603 (uint_t)wakephys, (void *)&kernel_wc_code))
604 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
605 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
606 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
607 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
608 cpup->wc_esp))
609 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
610 (long)cpup->wc_cr0, (long)cpup->wc_cr3,
611 (long)cpup->wc_cr4))
612 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
613 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
614 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
615 (long)cpup->wc_eflags))
617 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
618 "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
619 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
620 cpup->wc_idt_limit, (long)cpup->wc_ldt,
621 (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
623 gdt.base = cpup->wc_gdt_base;
624 gdt.limit = cpup->wc_gdt_limit;
626 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
627 (uintptr_t)wc_rm_start);
629 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
631 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
632 (ulong_t)wcpp->rm_cr4, getcr4()))
634 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
635 (ulong_t)wcpp->rm_pdbr, getcr3()))
637 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
638 (ulong_t)wcpp->rm_longmode64_addr))
640 PMD(PMD_SX,
641 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
642 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
644 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
645 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
646 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
647 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
648 (long)cpup->wc_kgsbase))
650 power_req.request_type = PMR_PPM_ENTER_SX;
651 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
652 power_req.req.ppm_power_enter_sx_req.test_point =
653 cpr_test_point;
654 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
656 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
657 PT(PT_PPMCTLOP);
658 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
659 &power_req, &ret);
660 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
663 * If it works, we get control back to the else branch below
664 * If we get control back here, it didn't work.
665 * XXX return EINVAL here?
668 unmap_wakeaddr_1to1(wakephys);
669 intr_restore(saved_intr);
671 return (ret);
672 } else {
673 cpr_suspend_succeeded = 1;
675 power_req.request_type = PMR_PPM_EXIT_SX;
676 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
678 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
679 PT(PT_PPMCTLOP);
680 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
681 &power_req, &ret);
682 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
684 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
686 * the restore should never fail, if the saved suceeded
688 ASSERT(ret == 0);
690 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
693 * Enable interrupts on boot cpu.
695 ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
696 mutex_enter(&cpu_lock);
697 cpu_enable_intr(CPU);
698 mutex_exit(&cpu_lock);
700 PT(PT_INTRRESTORE);
701 intr_restore(saved_intr);
702 PT(PT_CPU);
704 return (ret);
709 * Stop all other cpu's before halting or rebooting. We pause the cpu's
710 * instead of sending a cross call.
711 * Stolen from sun4/os/mp_states.c
714 static int cpu_are_paused; /* sic */
716 void
717 i_cpr_stop_other_cpus(void)
719 mutex_enter(&cpu_lock);
720 if (cpu_are_paused) {
721 mutex_exit(&cpu_lock);
722 return;
724 pause_cpus(NULL, NULL);
725 cpu_are_paused = 1;
727 mutex_exit(&cpu_lock);
731 i_cpr_is_supported(int sleeptype)
733 extern int cpr_supported_override;
734 extern int cpr_platform_enable;
735 extern int pm_S3_enabled;
737 if (sleeptype != CPR_TORAM)
738 return (0);
741 * The next statement tests if a specific platform has turned off
742 * cpr support.
744 if (cpr_supported_override)
745 return (0);
748 * If a platform has specifically turned on cpr support ...
750 if (cpr_platform_enable)
751 return (1);
753 return (pm_S3_enabled);
756 void
757 i_cpr_bitmap_cleanup(void)
761 void
762 i_cpr_free_memory_resources(void)
767 * Needed only for S3 so far
769 static int
770 i_cpr_platform_alloc(psm_state_request_t *req)
772 #ifdef DEBUG
773 char *str = "i_cpr_platform_alloc";
774 #endif
776 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
778 if (psm_state == NULL) {
779 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
780 return (0);
783 req->psr_cmd = PSM_STATE_ALLOC;
784 return ((*psm_state)(req));
788 * Needed only for S3 so far
790 static void
791 i_cpr_platform_free(psm_state_request_t *req)
793 #ifdef DEBUG
794 char *str = "i_cpr_platform_free";
795 #endif
797 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
799 if (psm_state == NULL) {
800 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
801 return;
804 req->psr_cmd = PSM_STATE_FREE;
805 (void) (*psm_state)(req);
808 static int
809 i_cpr_save_apic(psm_state_request_t *req)
811 #ifdef DEBUG
812 char *str = "i_cpr_save_apic";
813 #endif
815 if (psm_state == NULL) {
816 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
817 return (0);
820 req->psr_cmd = PSM_STATE_SAVE;
821 return ((*psm_state)(req));
824 static int
825 i_cpr_restore_apic(psm_state_request_t *req)
827 #ifdef DEBUG
828 char *str = "i_cpr_restore_apic";
829 #endif
831 if (psm_state == NULL) {
832 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
833 return (0);
836 req->psr_cmd = PSM_STATE_RESTORE;
837 return ((*psm_state)(req));
840 static void
841 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
843 /*LINTED*/
844 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
847 * Fill up the real mode platter to make it easy for real mode code to
848 * kick it off. This area should really be one passed by boot to kernel
849 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
850 * have identical physical and virtual address in paged mode.
853 real_mode_platter->rm_pdbr = getcr3();
854 real_mode_platter->rm_cpu = cpun;
855 real_mode_platter->rm_cr4 = cr4;
857 real_mode_platter->rm_gdt_base = gdt.base;
858 real_mode_platter->rm_gdt_lim = gdt.limit;
860 if (getcr3() > 0xffffffffUL)
861 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
862 "located above 4G in physical memory (@ 0x%llx).",
863 (unsigned long long)getcr3());
866 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
867 * by code in real_mode_start():
869 * GDT[0]: NULL selector
870 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
872 * Clear the IDT as interrupts will be off and a limit of 0 will cause
873 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
874 * a course of action as any other, though it may cause the entire
875 * platform to reset in some cases...
877 real_mode_platter->rm_temp_gdt[0] = 0ULL;
878 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
880 real_mode_platter->rm_temp_gdt_lim = (ushort_t)
881 (sizeof (real_mode_platter->rm_temp_gdt) - 1);
882 real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
883 offsetof(rm_platter_t, rm_temp_gdt);
885 real_mode_platter->rm_temp_idt_lim = 0;
886 real_mode_platter->rm_temp_idt_base = 0;
889 * Since the CPU needs to jump to protected mode using an identity
890 * mapped address, we need to calculate it here.
892 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
894 /* return; */
897 void
898 i_cpr_start_cpu(void)
901 struct cpu *cp = CPU;
903 char *str = "i_cpr_start_cpu";
904 extern void init_cpu_syscall(struct cpu *cp);
906 PMD(PMD_SX, ("%s() called\n", str))
908 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
909 cp->cpu_base_spl))
911 mutex_enter(&cpu_lock);
912 if (cp == i_cpr_bootcpu()) {
913 mutex_exit(&cpu_lock);
914 PMD(PMD_SX,
915 ("%s() called on bootcpu nothing to do!\n", str))
916 return;
918 mutex_exit(&cpu_lock);
921 * We need to Sync PAT with cpu0's PAT. We have to do
922 * this with interrupts disabled.
924 pat_sync();
927 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
929 if (fp_save_mech == FP_XSAVE) {
930 setup_xfem();
934 * Initialize this CPU's syscall handlers
936 init_cpu_syscall(cp);
938 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
941 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
942 * init_cpu_info(), since the work that they do is only needed to
943 * be done once at boot time
947 mutex_enter(&cpu_lock);
948 CPUSET_ADD(procset, cp->cpu_id);
949 mutex_exit(&cpu_lock);
951 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
952 cp->cpu_base_spl))
954 if (tsc_gethrtime_enable) {
955 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
956 tsc_sync_slave();
959 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
960 cp->cpu_id, cp->cpu_intr_actv))
961 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
962 cp->cpu_base_spl))
964 (void) spl0(); /* enable interrupts */
966 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
967 cp->cpu_base_spl))
970 * Set up the CPU module for this CPU. This can't be done before
971 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
972 * need to go load another CPU module. The act of attempting to load
973 * a module may trigger a cross-call, which will ASSERT unless this
974 * cpu is CPU_READY.
978 * cmi already been init'd (during boot), so do not need to do it again
980 #ifdef PM_REINITMCAONRESUME
981 if (is_x86_feature(x86_featureset, X86FSET_MCA))
982 cmi_mca_init();
983 #endif
985 PMD(PMD_SX, ("%s() returning\n", str))
987 /* return; */
990 void
991 i_cpr_alloc_cpus(void)
993 char *str = "i_cpr_alloc_cpus";
995 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
997 * we allocate this only when we actually need it to save on
998 * kernel memory
1001 if (wc_other_cpus == NULL) {
1002 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1003 KM_SLEEP);
1008 void
1009 i_cpr_free_cpus(void)
1011 int index;
1012 wc_cpu_t *wc_cpu;
1014 if (wc_other_cpus != NULL) {
1015 for (index = 0; index < max_ncpus; index++) {
1016 wc_cpu = wc_other_cpus + index;
1017 if (wc_cpu->wc_saved_stack != NULL) {
1018 kmem_free(wc_cpu->wc_saved_stack,
1019 wc_cpu->wc_saved_stack_size);
1023 kmem_free((void *) wc_other_cpus,
1024 max_ncpus * sizeof (wc_cpu_t));
1025 wc_other_cpus = NULL;
1030 * wrapper for acpica_ddi_save_resources()
1032 void
1033 i_cpr_save_configuration(dev_info_t *dip)
1035 acpica_ddi_save_resources(dip);
1039 * wrapper for acpica_ddi_restore_resources()
1041 void
1042 i_cpr_restore_configuration(dev_info_t *dip)
1044 acpica_ddi_restore_resources(dip);
1047 static int
1048 wait_for_set(cpuset_t *set, int who)
1050 int delays;
1051 char *str = "wait_for_set";
1053 for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1054 if (delays == 500) {
1056 * After five seconds, things are probably
1057 * looking a bit bleak - explain the hang.
1059 cmn_err(CE_NOTE, "cpu%d: started, "
1060 "but not running in the kernel yet", who);
1061 PMD(PMD_SX, ("%s() %d cpu started "
1062 "but not running in the kernel yet\n",
1063 str, who))
1064 } else if (delays > 2000) {
1066 * We waited at least 20 seconds, bail ..
1068 cmn_err(CE_WARN, "cpu%d: timed out", who);
1069 PMD(PMD_SX, ("%s() %d cpu timed out\n",
1070 str, who))
1071 return (0);
1075 * wait at least 10ms, then check again..
1077 drv_usecwait(10000);
1080 return (1);
1083 static void
1084 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1086 size_t stack_size; /* size of stack */
1087 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1088 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1090 stack_size = (size_t)end - (size_t)start;
1092 if (wc_cpu->wc_saved_stack_size < stack_size) {
1093 if (wc_cpu->wc_saved_stack != NULL) {
1094 kmem_free(wc_cpu->wc_saved_stack,
1095 wc_cpu->wc_saved_stack_size);
1097 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1098 wc_cpu->wc_saved_stack_size = stack_size;
1101 bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1104 void
1105 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1107 size_t stack_size; /* size of stack */
1108 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1109 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1111 stack_size = (size_t)end - (size_t)start;
1113 bcopy(save_stack, start, stack_size);