Remove CACHE_ROM.
[coreboot.git] / src / cpu / intel / haswell / mp_init.c
blob03983605bdb5c909dd6767e11d79120435f9b62a
1 /*
2 * This file is part of the coreboot project.
4 * Copyright (C) 2013 ChromeOS Authors
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; version 2 of
9 * the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19 * MA 02110-1301 USA
22 #include <console/console.h>
23 #include <stdint.h>
24 #include <rmodule.h>
25 #include <arch/cpu.h>
26 #include <cpu/cpu.h>
27 #include <cpu/intel/microcode.h>
28 #include <cpu/x86/cache.h>
29 #include <cpu/x86/lapic.h>
30 #include <cpu/x86/msr.h>
31 #include <cpu/x86/mtrr.h>
32 #include <cpu/x86/smm.h>
33 #include <delay.h>
34 #include <device/device.h>
35 #include <device/path.h>
36 #include <lib.h>
37 #include <smp/atomic.h>
38 #include <smp/spinlock.h>
39 #include <thread.h>
40 #include "haswell.h"
42 /* This needs to match the layout in the .module_parametrs section. */
43 struct sipi_params {
44 u16 gdtlimit;
45 u32 gdt;
46 u16 unused;
47 u32 idt_ptr;
48 u32 stack_top;
49 u32 stack_size;
50 u32 microcode_ptr;
51 u32 msr_table_ptr;
52 u32 msr_count;
53 u32 c_handler;
54 u32 c_handler_arg;
55 u8 apic_to_cpu_num[CONFIG_MAX_CPUS];
56 } __attribute__((packed));
58 /* This also needs to match the assembly code for saved MSR encoding. */
59 struct saved_msr {
60 u32 index;
61 u32 lo;
62 u32 hi;
63 } __attribute__((packed));
66 /* The sipi vector rmodule is included in the ramstage using 'objdump -B'. */
67 extern char _binary_sipi_vector_start[];
68 /* These symbols are defined in c_start.S. */
69 extern char gdt[];
70 extern char gdt_end[];
71 extern char idtarg[];
73 /* This table keeps track of each CPU's APIC id. */
74 static u8 apic_id_table[CONFIG_MAX_CPUS];
75 static device_t cpu_devs[CONFIG_MAX_CPUS];
77 /* Number of APs checked that have checked in. */
78 static atomic_t num_aps;
79 /* Number of APs that have relocated their SMM handler. */
80 static atomic_t num_aps_relocated_smm;
81 /* Barrier to stop APs from performing SMM relocation. */
82 static int smm_relocation_barrier_begin __attribute__ ((aligned (64)));
83 /* Determine if hyperthreading is disabled. */
84 int ht_disabled;
86 static inline void mfence(void)
88 __asm__ __volatile__("mfence\t\n": : :"memory");
91 static inline void wait_for_barrier(volatile int *barrier)
93 while (*barrier == 0) {
94 asm ("pause");
98 static inline void release_barrier(volatile int *barrier)
100 *barrier = 1;
103 static void ap_wait_for_smm_relocation_begin(void)
105 wait_for_barrier(&smm_relocation_barrier_begin);
108 /* This function pointer is used by the non-BSP CPUs to initiate relocation. It
109 * points to either a serial or parallel SMM initiation. */
110 static void (*ap_initiate_smm_relocation)(void) = &smm_initiate_relocation;
113 /* Returns 1 if timeout waiting for APs. 0 if target aps found. */
114 static int wait_for_aps(atomic_t *val, int target, int total_delay,
115 int delay_step)
117 int timeout = 0;
118 int delayed = 0;
119 while (atomic_read(val) != target) {
120 udelay(delay_step);
121 delayed += delay_step;
122 if (delayed >= total_delay) {
123 timeout = 1;
124 break;
128 return timeout;
131 void release_aps_for_smm_relocation(int do_parallel)
133 /* Change the AP SMM initiation function, and ensure it is visible
134 * before releasing the APs. */
135 if (do_parallel) {
136 ap_initiate_smm_relocation = &smm_initiate_relocation_parallel;
137 mfence();
139 release_barrier(&smm_relocation_barrier_begin);
140 /* Wait for CPUs to relocate their SMM handler up to 100ms. */
141 if (wait_for_aps(&num_aps_relocated_smm, atomic_read(&num_aps),
142 100000 /* 100 ms */, 200 /* us */))
143 printk(BIOS_DEBUG, "Timed out waiting for AP SMM relocation\n");
146 /* By the time APs call ap_init() caching has been setup, and microcode has
147 * been loaded. */
148 static void asmlinkage ap_init(unsigned int cpu, void *microcode_ptr)
150 struct cpu_info *info;
152 /* Signal that the AP has arrived. */
153 atomic_inc(&num_aps);
155 /* Ensure the local apic is enabled */
156 enable_lapic();
158 info = cpu_info();
159 info->index = cpu;
160 info->cpu = cpu_devs[cpu];
161 thread_init_cpu_info_non_bsp(info);
163 apic_id_table[info->index] = lapicid();
164 info->cpu->path.apic.apic_id = apic_id_table[info->index];
166 /* Call through the cpu driver's initialization. */
167 cpu_initialize(info->index);
169 ap_wait_for_smm_relocation_begin();
171 ap_initiate_smm_relocation();
173 /* Indicate that SMM relocation has occurred on this thread. */
174 atomic_inc(&num_aps_relocated_smm);
176 /* After SMM relocation a 2nd microcode load is required. */
177 intel_microcode_load_unlocked(microcode_ptr);
179 /* FIXME(adurbin): park CPUs properly -- preferably somewhere in a
180 * reserved part of memory that the OS cannot get to. */
181 stop_this_cpu();
184 static void setup_default_sipi_vector_params(struct sipi_params *sp)
186 int i;
187 u8 apic_id;
188 u8 apic_id_inc;
190 sp->gdt = (u32)&gdt;
191 sp->gdtlimit = (u32)&gdt_end - (u32)&gdt - 1;
192 sp->idt_ptr = (u32)&idtarg;
193 sp->stack_size = CONFIG_STACK_SIZE;
194 sp->stack_top = (u32)&_estack;
195 /* Adjust the stack top to take into account cpu_info. */
196 sp->stack_top -= sizeof(struct cpu_info);
198 /* Default to linear APIC id space if HT is enabled. If it is
199 * disabled the APIC ids increase by 2 as the odd numbered APIC
200 * ids are not present.*/
201 apic_id_inc = (ht_disabled) ? 2 : 1;
202 for (i = 0, apic_id = 0; i < CONFIG_MAX_CPUS; i++) {
203 sp->apic_to_cpu_num[i] = apic_id;
204 apic_id += apic_id_inc;
208 #define NUM_FIXED_MTRRS 11
209 static unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
210 MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
211 MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR,
212 MTRRfix4K_D8000_MSR, MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR,
213 MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
216 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
218 msr_t msr;
220 msr = rdmsr(index);
221 entry->index = index;
222 entry->lo = msr.lo;
223 entry->hi = msr.hi;
225 /* Return the next entry. */
226 entry++;
227 return entry;
230 static int save_bsp_msrs(char *start, int size)
232 int msr_count;
233 int num_var_mtrrs;
234 struct saved_msr *msr_entry;
235 int i;
236 msr_t msr;
238 /* Determine number of MTRRs need to be saved. */
239 msr = rdmsr(MTRRcap_MSR);
240 num_var_mtrrs = msr.lo & 0xff;
242 /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE. */
243 msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
245 if ((msr_count * sizeof(struct saved_msr)) > size) {
246 printk(BIOS_CRIT, "Cannot mirror all %d msrs.\n", msr_count);
247 return -1;
250 msr_entry = (void *)start;
251 for (i = 0; i < NUM_FIXED_MTRRS; i++) {
252 msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
255 for (i = 0; i < num_var_mtrrs; i++) {
256 msr_entry = save_msr(MTRRphysBase_MSR(i), msr_entry);
257 msr_entry = save_msr(MTRRphysMask_MSR(i), msr_entry);
260 msr_entry = save_msr(MTRRdefType_MSR, msr_entry);
262 return msr_count;
265 /* The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is at the
266 * memory range is already reserved so the OS cannot use it. That region is
267 * free to use for AP bringup before SMM is initialized. */
268 static u32 sipi_vector_location = SMM_DEFAULT_BASE;
269 static int sipi_vector_location_size = SMM_DEFAULT_SIZE;
271 static int load_sipi_vector(const void *microcode_patch)
273 struct rmodule sipi_mod;
274 int module_size;
275 int num_msrs;
276 struct sipi_params *sp;
277 char *mod_loc = (void *)sipi_vector_location;
278 const int loc_size = sipi_vector_location_size;
280 if (rmodule_parse(&_binary_sipi_vector_start, &sipi_mod)) {
281 printk(BIOS_CRIT, "Unable to parse sipi module.\n");
282 return -1;
285 if (rmodule_entry_offset(&sipi_mod) != 0) {
286 printk(BIOS_CRIT, "SIPI module entry offset is not 0!\n");
287 return -1;
290 if (rmodule_load_alignment(&sipi_mod) != 4096) {
291 printk(BIOS_CRIT, "SIPI module load alignment(%d) != 4096.\n",
292 rmodule_load_alignment(&sipi_mod));
293 return -1;
296 module_size = rmodule_memory_size(&sipi_mod);
298 /* Align to 4 bytes. */
299 module_size += 3;
300 module_size &= ~3;
302 if (module_size > loc_size) {
303 printk(BIOS_CRIT, "SIPI module size (%d) > region size (%d).\n",
304 module_size, loc_size);
305 return -1;
308 num_msrs = save_bsp_msrs(&mod_loc[module_size], loc_size - module_size);
310 if (num_msrs < 0) {
311 printk(BIOS_CRIT, "Error mirroring BSP's msrs.\n");
312 return -1;
315 if (rmodule_load(mod_loc, &sipi_mod)) {
316 printk(BIOS_CRIT, "Unable to load SIPI module.\n");
317 return -1;
320 sp = rmodule_parameters(&sipi_mod);
322 if (sp == NULL) {
323 printk(BIOS_CRIT, "SIPI module has no parameters.\n");
324 return -1;
327 setup_default_sipi_vector_params(sp);
328 /* Setup MSR table. */
329 sp->msr_table_ptr = (u32)&mod_loc[module_size];
330 sp->msr_count = num_msrs;
331 /* Provide pointer to microcode patch. */
332 sp->microcode_ptr = (u32)microcode_patch;
333 /* The microcode pointer is passed on through to the c handler so
334 * that it can be loaded again after SMM relocation. */
335 sp->c_handler_arg = (u32)microcode_patch;
336 sp->c_handler = (u32)&ap_init;
338 /* Make sure SIPI vector hits RAM so the APs that come up will see
339 * the startup code even if the caches are disabled. */
340 wbinvd();
342 return 0;
345 static int allocate_cpu_devices(struct bus *cpu_bus, int *total_hw_threads)
347 int i;
348 int num_threads;
349 int num_cores;
350 int max_cpus;
351 struct cpu_info *info;
352 msr_t msr;
354 info = cpu_info();
355 cpu_devs[info->index] = info->cpu;
356 apic_id_table[info->index] = info->cpu->path.apic.apic_id;
358 msr = rdmsr(CORE_THREAD_COUNT_MSR);
359 num_threads = (msr.lo >> 0) & 0xffff;
360 num_cores = (msr.lo >> 16) & 0xffff;
361 printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n",
362 num_cores, num_threads);
364 max_cpus = num_threads;
365 *total_hw_threads = num_threads;
366 if (num_threads > CONFIG_MAX_CPUS) {
367 printk(BIOS_CRIT, "CPU count(%d) exceeds CONFIG_MAX_CPUS(%d)\n",
368 num_threads, CONFIG_MAX_CPUS);
369 max_cpus = CONFIG_MAX_CPUS;
372 /* Determine if hyperthreading is enabled. If not, the APIC id space
373 * is sparse with ids incrementing by 2 instead of 1. */
374 ht_disabled = num_threads == num_cores;
376 for (i = 1; i < max_cpus; i++) {
377 struct device_path cpu_path;
378 device_t new;
380 /* Build the cpu device path */
381 cpu_path.type = DEVICE_PATH_APIC;
382 cpu_path.apic.apic_id = info->cpu->path.apic.apic_id + i;
383 if (ht_disabled)
384 cpu_path.apic.apic_id = cpu_path.apic.apic_id * 2;
386 /* Allocate the new cpu device structure */
387 new = alloc_find_dev(cpu_bus, &cpu_path);
388 if (new == NULL) {
389 printk(BIOS_CRIT, "Could not allocate cpu device\n");
390 max_cpus--;
392 cpu_devs[i] = new;
395 return max_cpus;
398 int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
399 const void *microcode_patch)
401 int num_cpus;
402 int hw_threads;
404 /* Default to currently running CPU. */
405 num_cpus = allocate_cpu_devices(cpu_bus, &hw_threads);
407 /* Load the SIPI vector. */
408 if (load_sipi_vector(microcode_patch))
409 return -1;
411 *max_cpus = num_cpus;
413 if (num_cpus < hw_threads) {
414 printk(BIOS_CRIT,
415 "ERROR: More HW threads (%d) than support (%d).\n",
416 hw_threads, num_cpus);
417 return -1;
420 return 0;
423 /* Returns 1 for timeout. 0 on success. */
424 static int apic_wait_timeout(int total_delay, int delay_step)
426 int total = 0;
427 int timeout = 0;
429 while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
430 udelay(delay_step);
431 total += delay_step;
432 if (total >= total_delay) {
433 timeout = 1;
434 break;
438 return timeout;
441 int start_aps(struct bus *cpu_bus, int ap_count)
443 int sipi_vector;
445 if (ap_count == 0)
446 return 0;
448 /* The vector is sent as a 4k aligned address in one byte. */
449 sipi_vector = sipi_vector_location >> 12;
451 if (sipi_vector > 256) {
452 printk(BIOS_CRIT, "SIPI vector too large! 0x%08x\n",
453 sipi_vector);
454 return -1;
457 printk(BIOS_DEBUG, "Attempting to start %d APs\n", ap_count);
459 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
460 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
461 if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
462 printk(BIOS_DEBUG, "timed out. Aborting.\n");
463 return -1;
464 } else
465 printk(BIOS_DEBUG, "done.\n");
468 /* Send INIT IPI to all but self. */
469 lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
470 lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
471 LAPIC_DM_INIT);
472 printk(BIOS_DEBUG, "Waiting for 10ms after sending INIT.\n");
473 mdelay(10);
475 /* Send 1st SIPI */
476 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
477 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
478 if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
479 printk(BIOS_DEBUG, "timed out. Aborting.\n");
480 return -1;
481 } else
482 printk(BIOS_DEBUG, "done.\n");
485 lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
486 lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
487 LAPIC_DM_STARTUP | sipi_vector);
488 printk(BIOS_DEBUG, "Waiting for 1st SIPI to complete...");
489 if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
490 printk(BIOS_DEBUG, "timed out.\n");
491 return -1;
492 } else {
493 printk(BIOS_DEBUG, "done.\n");
495 /* Wait for CPUs to check in up to 200 us. */
496 wait_for_aps(&num_aps, ap_count, 200 /* us */, 15 /* us */);
498 /* Send 2nd SIPI */
499 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
500 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
501 if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
502 printk(BIOS_DEBUG, "timed out. Aborting.\n");
503 return -1;
504 } else
505 printk(BIOS_DEBUG, "done.\n");
508 lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
509 lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
510 LAPIC_DM_STARTUP | sipi_vector);
511 printk(BIOS_DEBUG, "Waiting for 2nd SIPI to complete...");
512 if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
513 printk(BIOS_DEBUG, "timed out.\n");
514 return -1;
515 } else {
516 printk(BIOS_DEBUG, "done.\n");
519 /* Wait for CPUs to check in. */
520 if (wait_for_aps(&num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
521 printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
522 atomic_read(&num_aps), ap_count);
523 return -1;
526 return 0;
529 void smm_initiate_relocation_parallel(void)
531 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
532 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
533 if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
534 printk(BIOS_DEBUG, "timed out. Aborting.\n");
535 return;
536 } else
537 printk(BIOS_DEBUG, "done.\n");
540 lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(lapicid()));
541 lapic_write_around(LAPIC_ICR, LAPIC_INT_ASSERT | LAPIC_DM_SMI);
542 if (apic_wait_timeout(1000 /* 1 ms */, 100 /* us */)) {
543 printk(BIOS_DEBUG, "SMI Relocation timed out.\n");
544 } else
545 printk(BIOS_DEBUG, "Relocation complete.\n");
549 DECLARE_SPIN_LOCK(smm_relocation_lock);
551 void smm_initiate_relocation(void)
553 spin_lock(&smm_relocation_lock);
554 smm_initiate_relocation_parallel();
555 spin_unlock(&smm_relocation_lock);